library( geojsonio )   # read shapefiles
library( sp )          # work with shapefiles
library( sf )          # work with shapefiles - simple features format
library( mclust )      # cluster analysis 
library( tmap )        # theme maps
library( ggmap )       # map tiles 
library( ggplot2 )     # graphing 
library( ggthemes )    # nice formats for ggplots
library( dplyr )       # data wrangling 
library( pander )      # formatting RMD tables
library( tidycensus )

library( cartogram )  # spatial maps w/ tract size bias reduction
library( maptools )   # spatial object manipulation 

library(here)
### HELPER FUNCTIONS FOR LAYOUT

theme_bare <- theme(
  axis.line = element_blank(), 
  axis.text.x = element_blank(), 
  axis.text.y = element_blank(),
  axis.ticks = element_blank(), 
  axis.title.x = element_blank(), 
  axis.title.y = element_blank(),
  legend.text=element_text(size=7),
  legend.title=element_text(size=8),
  panel.background = element_blank(),
  panel.border = element_rect(colour = "gray", fill=NA, size=0.5)
)


# Multiple plot function
# http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols:   Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)

  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)

  numPlots = length(plots)

  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                    ncol = cols, nrow = ceiling(numPlots/cols))
  }

 if (numPlots==1) {
    print(plots[[1]])

  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))

    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}
here()
## [1] "C:/Users/jdlecy/Dropbox/00 - PEDA/00 - GITHUB/usa-dorling-shapefiles"
census_api_key( "b431c35dad89e2863681311677d12581e8f24c24" )

crosswalk <- readRDS(here("data/data-raw/cbsa-crosswalk.rds") ) 

these.seattle <- crosswalk$msaname == "SEATTLE-BELLEVUE-EVERETT, WA"
these.fips <- crosswalk$fipscounty[ these.seattle ]
these.fips <- na.omit( these.fips )

state.fips <- substr( these.fips, 1, 2 )
county.fips <- substr( these.fips, 3, 5 )

seattle.pop <-
  get_acs( geography = "tract", 
           variables = "B01003_001", 
           state = "53", 
           county = county.fips[state.fips=="53"], 
           geometry = TRUE ) %>%
  select( GEOID, estimate ) %>%
  rename( POP = estimate )
census.dat <- read.csv( here("data/data-raw/LTDB_Std_2010_fullcount.csv" ))

# merge shapefile data with census data in new dataframe
seattle <- merge( seattle.pop, census.dat, by.x="GEOID", by.y="tractid" )
seattle2 <- seattle[ ! st_is_empty( seattle ) , ]
seattle.sp <- as_Spatial( seattle2 )
class( seattle.sp )
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
plot(seattle.sp)

Map Tiles

# project map and remove empty tracts
seattle <- spTransform( seattle.sp, CRS("+init=epsg:3395"))
seattle <- seattle[ seattle$POP != 0 & (! is.na( seattle$POP )) , ]

# convert census tract polygons to dorling cartogram
# no idea why k=0.03 works, but it does - default is k=5
seattle$pop.w <- seattle$POP / 9000 # max(msp.sp$POP)   # standardizes it to max of 1.5
seattle_dorling <- cartogram_dorling( x=seattle, weight="pop.w", k=0.05 )
plot( seattle_dorling )

dorling.map <- spTransform( seattle_dorling, CRS("+proj=longlat +datum=WGS84") )
plot( dorling.map )

class( dorling.map )
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
dorling.sf <-  as( dorling.map, "sf" )
class( dorling.sf )
## [1] "sf"         "data.frame"
### CREATE BOUNDING BOX

sf::st_crs( dorling.sf ) # already in WGS84
## Coordinate Reference System:
##   User input: unknown 
##   wkt:
## GEOGCRS["unknown",
##     DATUM["World Geodetic System 1984",
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]],
##         ID["EPSG",6326]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433],
##         ID["EPSG",8901]],
##     CS[ellipsoidal,2],
##         AXIS["longitude",east,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433,
##                 ID["EPSG",9122]]],
##         AXIS["latitude",north,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433,
##                 ID["EPSG",9122]]]]
# seattle <- sf::st_transform( seattle, crs = 4326 ) # WGS84 is crs 4326

sf::st_bbox( dorling.sf )
##       xmin       ymin       xmax       ymax 
## -122.72832   47.17889 -121.39524   48.38813
# needs names left, top, right, left for ggmap
bbox <- sf::st_bbox( dorling.sf )
names(bbox) <- c("left","bottom","right","top")




### GET MAP TILES

seattle_stamen <- ggmap::get_stamenmap( bbox=bbox, maptype="toner-lite", zoom=10 )
ggmap( seattle_stamen )

Choropleth Maps

### CHOROPLETH MAP

seattle_sf <- dorling.sf

pop.quant <- quantile( seattle_sf$POP, c(0,0.2,0.4,0.6,0.8,1) ) %>% round(0)
labels <- paste0( pop.quant[-length(pop.quant)], "-", pop.quant[-1] )
seattle_sf <- mutate( seattle_sf, pop.cat = cut( POP, breaks=pop.quant, labels=labels ) ) 


seattle.sp <- spTransform( seattle.sp, CRS("+proj=longlat +datum=WGS84") )
seattle_sp <- dorling.sf <-  as( seattle.sp, "sf" )

pop.quant <- quantile( seattle_sp$POP, c(0,0.2,0.4,0.6,0.8,1) ) %>% round(0)
labels <- paste0( pop.quant[-length(pop.quant)], "-", pop.quant[-1] )
seattle_sp <- mutate( seattle_sp, pop.cat = cut( POP, breaks=pop.quant, labels=labels ) ) 

ggplot( seattle_sf ) + 
    geom_sf( aes( fill=pop.cat ) ) +
    scale_fill_brewer( palette = "OrRd" ) 

ggplot( seattle_sp ) + 
    geom_sf( aes( fill=pop.cat ) ) +
    scale_fill_brewer( palette = "OrRd" ) 

### CHOROPLETH MAP WITH BACKGROUND TILES


# single map

ggmap( seattle_stamen, extent="device" ) +
    geom_sf( data=seattle_sf, col="darkorange4", alpha=0.5, inherit.aes = FALSE ) +
    # geom_sf( data=seattle_sf, aes( fill=pop.cat ), alpha=0.5, inherit.aes = FALSE ) +
    # scale_fill_brewer( type="seq", palette = "Blues" ) +
    ggtitle( "Dorling Cartogram" ) +
    theme_bare

Demographics

# multiple maps together

map1 <-
    ggmap( seattle_stamen ) +
    geom_sf( data=seattle_sp, aes( fill=pop.cat), alpha=0.5, inherit.aes = FALSE ) +
    scale_fill_brewer( type="seq", palette = "Purples" ) +
    ggtitle( "MAP1 Regular" ) +
    theme_bare

map2 <-
    ggmap( seattle_stamen ) +
    geom_sf( data=seattle_sf, aes( fill=pop.cat), alpha=0.5, inherit.aes = FALSE ) +
    scale_fill_brewer( type="seq", palette = "Purples" ) +
    ggtitle( "MAP1 Dorling" ) +
    theme_bare

map3 <-
    ggmap( seattle_stamen ) +
    geom_sf( data=seattle_sp, aes( fill=pop.cat), alpha=0.5, inherit.aes = FALSE ) +
    scale_fill_brewer( type="seq", palette = "Oranges" ) +
    ggtitle( "MAP2 Regular" ) +
    theme_bare

map4 <-
    ggmap( seattle_stamen, extent="device" ) +
    geom_sf( data=seattle_sf, aes( fill=pop.cat), alpha=0.5, inherit.aes = FALSE ) +
    scale_fill_brewer( type="seq", palette = "Oranges" ) +
    ggtitle( "MAP2 Dorling" ) +
    theme_bare


multiplot( map1, map3, map2, map4, cols=2 )

Color Options

# multiple maps together

map1 <-
    ggmap( seattle_stamen ) +
    geom_sf( data=seattle_sp, aes( fill=pop.cat), alpha=0.5, inherit.aes = FALSE ) +
    scale_fill_brewer( type="seq", palette = "Greens" ) +
    ggtitle( "MAP1" ) +
    theme_bare

map2 <-
    ggmap( seattle_stamen ) +
    geom_sf( data=seattle_sf, aes( fill=pop.cat), alpha=0.5, inherit.aes = FALSE ) +
    scale_fill_brewer( type="seq", palette = "Purples" ) +
    ggtitle( "MAP2" ) +
    theme_bare

map3 <-
    ggmap( seattle_stamen ) +
    geom_sf( data=seattle_sp, aes( fill=pop.cat), alpha=0.5, inherit.aes = FALSE ) +
    scale_fill_brewer( type="seq", palette = "Oranges" ) +
    ggtitle( "MAP3" ) +
    theme_bare

map4 <-
    ggmap( seattle_stamen, extent="device" ) +
    geom_sf( data=seattle_sf, aes( fill=pop.cat), alpha=0.5, inherit.aes = FALSE ) +
    scale_fill_brewer( type="seq", palette = "Blues" ) +
    ggtitle( "BLUES" ) +
    theme_bare


multiplot( map1, map3, map2, map4, cols=2 )

Create Dorling Function for Metros

# folder should be where RMD file is
# "C:/Users/jdlecy/Dropbox/00 - PEDA/00 - GITHUB/usa-dorling-shapefiles"
setwd( "usa-dorling-shapefiles/code" )

# load data
census_api_key( "b431c35dad89e2863681311677d12581e8f24c24" )
crosswalk <- readRDS( "../data/data-raw/cbsa-crosswalk.rds" ) 
census.dat <- read.csv( "../data/data-raw/LTDB_Std_2010_fullcount.csv" )

# fix leading zeros issue:
# convert both IDs to numeric so they both 
# drop the leading zeros

census.dat$tractid <- as.numeric( as.character( census.dat$tractid ) )

# inside the loop below: 
# d$GEOID <- as.numeric( as.character( d$GEOID ) )




options( tigris_use_cache = TRUE )

build_dorling_metro <- function( cbsa.name, include.plot=TRUE )
{

    these.counties  <- crosswalk$cbsaname == cbsa.name
    these.fips <- crosswalk$fipscounty[ these.counties ]
    these.fips <- na.omit( these.fips )

    state.fips <- substr( these.fips, 1, 2 )
    county.fips <- substr( these.fips, 3, 5 )

    # combine all counties
    d.sf <- NULL

    for( i in 1:length(these.fips) )
    {
       try(  

                d.temp <- 
            get_acs( geography = "tract", 
               variables = "B01003_001",  # population
               state = state.fips[i], 
               county = county.fips[i], 
               geometry = TRUE ) %>%
            select( GEOID, estimate ) %>%
            rename( POP = estimate )
           
           ) # end of try

       d.sf <- rbind( d.sf, d.temp )
    }


    # merge shapefile data with census data in new dataframe:
    # fix leading zeros problem
    # census.dat$tractid <- as.numeric( as.character( census.dat$tractid ) )
    d.sf$GEOID <- as.numeric( as.character( d.sf$GEOID ) )
    d.sf <- merge( d.sf, census.dat, by.x="GEOID", by.y="tractid", all.x=T )

    # convert sf to sp class
    d.sf <- d.sf[ ! st_is_empty( d.sf ) , ]
    d.sp <- as_Spatial( d.sf )

    # project map and remove empty tracts
    d.sp <- spTransform( d.sp, CRS("+proj=longlat +datum=WGS84") )
    d.sp  <- d.sp[ d.sp$POP != 0 & (! is.na( d.sp$POP )) , ]

    # save simple features shapefile as geojson
    metro.name <- tolower( cbsa.name )
    metro.name <- gsub( ",", "", metro.name )
    metro.name <- gsub( " ", "-", metro.name )
    file.name <- paste0( "../maps/metros-shapefile/", metro.name, "-sf.geojson" )
    geojson_write( d.sp, file=file.name, geometry="polygon" )

    # class( d.sp )
    # plot( d.sp)

    # project map and remove empty tracts
    # d.sp <- spTransform( d.sp, CRS("+init=epsg:3395") )
    # d.sp  <- d.sp[ d.sp$POP != 0 & (! is.na( d.sp$POP )) , ]

    # standardizes pop numbers for scaling
    d.sp$pop.w <- d.sp$POP / ( 2 * median(d.sp$POP) ) 
    total.pop <- sum( d.sp$POP, na.rm=T )
        k.scale <-  0.7 * ( 1 / ( log( total.pop ) - 10 ) ) 
        
    # convert census tract polygons to dorling cartogram
        d.sp <- spTransform( d.sp, CRS("+init=epsg:3395") )
    dorling.map <- cartogram_dorling( x=d.sp, weight="pop.w", k=k.scale )  # k=0.05

    # convert to WGS84 for GitHub capatability 
    metro.j <- spTransform( dorling.map, CRS("+proj=longlat +datum=WGS84") )

    # save dorling shapefile as geojson
    metro.name <- tolower( cbsa.name )
    metro.name <- gsub( ",", "", metro.name )
    metro.name <- gsub( " ", "-", metro.name )
    file.name <- paste0( "../maps/metros-dorling/", metro.name, "-dorling.geojson" )

    geojson_write( metro.j, file=file.name, geometry="polygon" )

    # plot the maps 
    if( include.plot )
    {
      par( mar=c(0,0,4,0), mfrow=c(1,2) )
      plot( d.sp, main=paste0("Census Tracts of \n", cbsa.name ) )
          plot( d.sp, border="gray80", main=paste0("Dorling Cartogram of \n", cbsa.name ) )
      plot( dorling.map, col=gray(0.5,0.5), add=TRUE  )
    }

    return( dorling.map )

}



# test the function: 
# fort.worth <- "Fort Worth-Arlington, TX"
# fw <- build_dorling_metro( cbsa.name = fort.worth )

Loop Over Metros

crosswalk <- read.csv( "https://raw.githubusercontent.com/DS4PS/cpp-529-master/master/data/cbsatocountycrosswalk.csv",  stringsAsFactors=F, colClasses="character" )

metro.areas <- unique( crosswalk$cbsaname )
metro.areas[ metro.areas == "" ] <- NA
metro.areas <- na.omit( metro.areas )

test.metros <- metro.areas[1:10]

for( j in test.metros )
{
  metro.j <- build_dorling_metro( cbsa.name = j )

  # plot metro with base layer ? 

}